package dirty

import (
	"bufio"
	"fmt"
	"io"
)

// todo use reader.Peek()
var skipped rune = 0

type tokenType int

const (
	UNKNOWN tokenType = iota
	LBRACKET
	RBRACKET
	STRING
	STRING_RAW
	NUMBER
	FLOAT
	CONST
	COMMENT
	EOF
)

type token struct {
	ttype tokenType
	t     string
	i     *int64
	f     *float64
}

func nextToken(reader *bufio.Reader) (token, error) {
	t, finished, e := nextToken_initial(reader)
	if finished || e != nil {
		return t, e
	}
	t, e = nextToken_rest(reader, t)
	return t, e
}
func nextToken_initial(reader *bufio.Reader) (token, bool, error) {
	var (
		r   rune  = 0
		err error = nil
		t   token
	)

initialTokenLoop:
	for {
		if skipped != 0 {
			r = skipped
			skipped = 0
		} else {
			r, _, err = reader.ReadRune()
		}
		//debugf("%c\n", r)
		if err != nil {
			if err == io.EOF {
				t := token{ttype: EOF}
				return t, true, nil
			}
			return token{}, true, fmt.Errorf("while reading: %w", err)
		}
		switch {
		case r == '(':
			return token{ttype: LBRACKET}, true, nil
		case r == ')':
			return token{ttype: RBRACKET}, true, nil
		case r == '#':
			t = token{ttype: COMMENT}
			break initialTokenLoop
		case r == '`':
			t = token{ttype: STRING_RAW}
			break initialTokenLoop
		case r == '\'':
			t = token{ttype: STRING}
			break initialTokenLoop
		case r == 't':
			t = token{ttype: CONST, t: "t"}
			break initialTokenLoop
		case r == 'f':
			t = token{ttype: CONST, t: "f"}
			break initialTokenLoop
		case r == 'n':
			t = token{ttype: CONST, t: "n"}
			break initialTokenLoop
		case in(r, []rune{'1', '2', '3', '4', '5', '6', '7', '8', '9', '↊', '↋', '-', '.', '·', ','}):
			t = token{ttype: NUMBER, t: string(r)}
			break initialTokenLoop
		case r == '0':
			r, _, err = reader.ReadRune()
			//debugf("%c\n", r)
			if err != nil {
				if err == io.EOF {
					t := token{ttype: EOF}
					return t, true, nil
				}
				return token{}, true, fmt.Errorf("while reading: %w", err)
			}
			switch r {
			case 'b':
				t = token{ttype: NUMBER, t: "0b"}
				break initialTokenLoop
			case 'o':
				t = token{ttype: NUMBER, t: "0o"}
				break initialTokenLoop
			case 'x':
				t = token{ttype: NUMBER, t: "0x"}
				break initialTokenLoop
			default:
				skipped = r
				var zero int64 = 0
				return token{ttype: NUMBER, i: &zero}, true, nil
			}
		case in(r, []rune{' ', '\t', '\n', '\r'}):
			continue
		default:
			return token{}, true, nil
		}
	}
	return t, false, err
}

func nextToken_rest(reader *bufio.Reader, t token) (token, error) {
	var (
		r                   rune
		err                 error  = nil
		escaping            bool   = false
		stringRawIndent     string = ""
		stringRawIndentSkip string = ""
		stringRawState      int    = 0 // todo enum
	)

tokenLoop:
	for {
		if skipped != 0 {
			r = skipped
			skipped = 0
		} else {
			r, _, err = reader.ReadRune()
		}
		//debugf("%c\n", r)
		// todo line, column
		if err != nil {
			if err == io.EOF {
				if t.ttype == STRING || t.ttype == STRING_RAW {
					return token{}, NewUnterminatedError("string", t.t)
				} else {
					return token{ttype: EOF}, nil
				}
			}
			return token{}, fmt.Errorf("while reading: %w", err)
		}

		switch t.ttype {
		case COMMENT:
			if r != '\n' {
				t.t += string(r)
			} else {
				break tokenLoop
			}
		case STRING:
			if !escaping && r == '\'' {
				t, err = parseString(t)
				if err != nil {
					return token{}, err
				}
				break tokenLoop
			} else if r == '\n' {
				return token{}, NewUnterminatedError("string", t.t)
			} else {
				t.t += string(r)
			}
			if escaping {
				escaping = false
			} else if r == '\\' {
				escaping = true
			}
		case STRING_RAW:
			if stringRawState == 0 {
				if r != '\n' {
					return token{}, NewRawStringError("missing new line after opening `")
				} else {
					stringRawState = 1
					continue
				}
			}
			if stringRawState == 1 {
				if r == ' ' || r == '\t' {
					stringRawIndent += string(r)
				} else {
					stringRawState = 2
					t.t += string(r)
				}
				continue
			}
			if stringRawState == 2 {
				// fixme assumes lines ending with \n; get to end of line
				if r == '\n' {
					stringRawState = 3
					stringRawIndentSkip = ""
				}
				t.t += string(r)
				continue
			}
			if stringRawState == 3 {
				if len(stringRawIndentSkip) == 0 && r == '`' {
					break tokenLoop
				}
				if len(stringRawIndentSkip) < len(stringRawIndent) {
					stringRawIndentSkip += string(r)
				} else {
					if stringRawIndent != stringRawIndentSkip {
						// todo convert whitespace to escape codes
						return token{}, NewRawStringError("Indent ‘" + stringRawIndent + "’ does not begin with ‘" + stringRawIndentSkip + "’")
					}
					skipped = r
					stringRawState = 2
				}
			}
		case CONST:
			if t.t[0] == 't' && in(r, []rune{'r', 'u', 'e'}) && len(t.t) < 4 {
				t.t += string(r)
				continue
			}
			if t.t[0] == 'f' && in(r, []rune{'a', 'l', 's', 'e'}) && len(t.t) < 5 {
				t.t += string(r)
				continue
			}
			if in(r, []rune{'u', 'l'}) && len(t.t) < 4 {
				t.t += string(r)
				continue
			}
			skipped = r
			t, err = parseConst(t)
			break tokenLoop
		case NUMBER:
			if t.t[0] == '0' && t.t[1] == 'b' && in(r, []rune{'0', '1', ','}) {
				t.t += string(r)
				continue
			}
			if t.t[0] == '0' && t.t[1] == 'o' && in(r, []rune{'0', '1', '2', '3', '4', '5', '6', '7'}) {
				t.t += string(r)
				continue
			}
			if t.t[0] == '0' && t.t[1] == 'x' && in(r, []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F'}) {
				t.t += string(r)
				continue
			}
			if in(r, []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '↊', '↋', ',', '.', '·', 'e', '-'}) {
				t.t += string(r)
				continue
			}
			skipped = r
			t, err = parseNumber(t) // todo errors that are not CommaError <- NumberError
			break tokenLoop
		}
	}
	return t, err
}

func in(c rune, expected []rune) bool {
	for _, e := range expected {
		if c == e {
			return true
		}
	}
	return false
}
